version 16.1
clear all
cd "MYPATH\derived\make_analysis_sample"
adopath + ../../ado/

cap log close

log using "build.log", replace

preliminaries
foreach PATH in RESULTS TEMP {
	if "`PATH'" != "TEMP" cap mkdir "${`PATH'}\derived\make_analysis_sample"
    if "`PATH'" != "TEMP" cap mkdir "${`PATH'}\derived\make_analysis_sample\figures"
	if "`PATH'" == "TEMP" cap mkdir "${`PATH'}\derived\make_analysis_sample"
}

graph set window fontface default
graph set ps fontface default
graph set window fontfacemono "Consolas"
graph set ps fontfacemono "Consolas"


program main 
	build_analysis_sample
end


program build_analysis_sample	
	di "KUB + 2011-2019, analysis sample (with wave 1)"
    use $DATA\master_dataset_clean, clear
	gen m_year = ym(year, month)
	format m_year %tm
	
	* Restrictions
	drop if undersokningsdatum_yr < 2011 
	drop if undersokningsdatum_yr > 2019
	define_outcomes_waves
	define_kub_type
	label_vars
	merge_mother_chars

	keep if kub_type == 2
	
	save $DATA\analysis_sample, replace
end

program define_outcomes_waves
	di "define outcomes"
	* If missing any of these vars, set to zero 
	local outcome_vars did_amnio did_cv did_kub did_nipt termination_clean ///
	  stillbirth chrom_ab 
	foreach var in `outcome_vars' {
		replace `var' = 0 if mi(`var')
	}
	
	* more vars
	gen year_dob = bfoddat_yr 
	la var year_dob "year of actual birth date (bfoddat)"
	
	* more pregnancy outcomes
	gen carried_22 = 1 * (stillbirth == 1 | live_birth == 1)
	gen no_live = 1 * (termination_clean == 1 | stillbirth == 1)

	gen chroma = (live_birth == 1) * (chrom_ab == 1)
	gen chroma_detect = (live_birth == 1) * (chrom_ab_detect == 1)
	gen chroma_other = (live_birth == 1) * (chrom_ab_other == 1) 
	gen healthy = (live_birth == 1) * (chrom_ab != 1)
	
	* set pregnancy outcomes to missing if pregnancy expected due date later than Nov 2019
	foreach var in termination_clean stillbirth chrom_ab carried_22 no_live chroma chroma_detect ///
	  chroma_other healthy {
	  	replace `var' = . if m_year > ym(2019,11)
	}
	
	* merge waves into analysis sample
	merge m:1 lan using $DATA\waves.dta, assert(2 3) keep(3) nogen
	gen week10_date = date_preg - 210
	gen wave = 1 if week10_date < dofm(wave2_intro)
	replace wave = 2 if week10_date >= dofm(wave2_intro)
	replace wave = 3 if week10_date >= dofm(wave3_intro)
	
	* Other useful variables
	gen kub_score = 1 / fetus_risk
	
	gen nipt_51_200 = inlist(lan, 1, 4, 9, 13, 14, 19, 20, 23)
    gen nipt_hr_cov = inlist(lan, 3, 17, 18, 21, 22)
	gen nipt_51_1000 = inlist(lan,7,12)
	gen nipt_1000 = inlist(lan, 18)
end

program define_kub_type
	gen kub_type = 0 if wave == 1 
	replace kub_type = 1 if inlist(lan, 3, 4, 9, 12, 14, 20, 21, 22, 23, 24) & wave > 1
	replace kub_type = 2 if inlist(lan, 1, 5, 6, 7, 8, 13, 17, 18, 19) & wave > 1
	* Some Counties modified KUB Offers
	replace kub_type = 1 if lan == 19 & week10_date >= dofm(ym(2018, 1))
	replace kub_type = 2 if lan == 3 & week10_date >= dofm(ym(2016, 1))
	replace kub_type = 2 if lan == 20 & week10_date >= dofm(ym(2017, 11))
	replace kub_type = 2 if lan == 22 & week10_date >= dofm(ym(2015, 1))
	replace kub_type = 2 if lan == 23 & week10_date >= dofm(ym(2016, 4))
	replace kub_type = 1 if lan == 17 & week10_date >= dofm(ym(2014,6)) & week10_date <= dofm(ym(2014, 9))
	replace kub_type = 1 if lan == 17 & week10_date >= dofm(ym(2015,4)) & week10_date <= dofm(ym(2015, 9))
	
	replace kub_type = 3 if lan == 10 & wave == 2
	label define kub_t 1 "Age 35 KUB threshold" 2 "Universal KUB coverage" 3 "Age 38 KUB threshold"
	label values kub_type kub_t
end

program label_vars	
	label var did_invasive "Did invasive"
	label var did_kub "Did kub"
	label var did_nipt "Did_nipt"
	label var subt "Any Subsequent testing"
	label var kub_score "Kub Risk (1 / fetus_risk)"

	label var carried_22 "Carried 22 weeks"
	label var no_live "Termination or stillborn"
	
	label var chroma "Live birth, diagnosed chromosomal abnormality"
	label var chroma_detect "Live birth, chrom ab detectable by NIPT"
	label var chroma_other "Live birth, chrom ab not detectable by NIPT"
	label var healthy "Live birth, no diagnosed chromosomal abnormality"
	
	label var kombinerad_risk_t13_18 "Raw KUB score (1/the probability) for t13 or 18"
	label var kombinerad_risk_t21 "Raw KUB score (1/the probability) for t21"
	
	label define lan_names 1 "Stockholm" 3 "Uppsala" 4 "Södermanland" 5 "Östergötland" ///
	  6 "Jönköping" 7 "Kronoberg" 8 "Kalmar" 9 "Gotland" 10 "Blekinge" 12 "Skåne" 13 "Halland" ///
	  14 "Västra Götaland" 17 "Värmland" 18 "Örebro" 19 "Västmanland" 20 "Dalarna" 21 "Gävleborg" ///
	  22 "Västernorrland" 23 "Jämtland" 24 "Västerbotten" 25 "Norrbotten"
    label values lan lan_names
end


program merge_mother_chars 
	*Income 
	merge m:1 lopnr year using "$DATA\income_tiles.dta", keep(3) assert(2 3) nogen 
	drop lopnr_yr_source
	
	* education
	merge m:1 lopnr year using "$MYPATH\MYPATH.dta", ///
	  assert(1 2 3) keep(1 3) keepusing(educ) nogen 
	replace educ = educ + 1 if !mi(educ) // code as 1, 2, 3 instead of 0, 1, 2
	label def lab_educ 1 "No college" 2 "Some college" 3 "Full college"
	label val educ lab_educ
	rename educ due_dt_year_educ
	replace year = year - 1
	merge m:1 lopnr year using "MYPATH\MYPATH.dta", ///
	  assert(1 2 3) keep(1 3) keepusing(educ) nogen 
	replace educ = educ + 1 if !mi(educ) // code as 1, 2, 3 instead of 0, 1, 2
	label val educ lab_educ
	label variable educ "education year before due date"
	replace year = year + 1

	* foreign born
	merge m:1 lopnr using "MYPATH\MYPATH.dta", ///
	  assert(1 2 3) keep(1 3) keepusing(fodelseland_fodgreg4 ) nogen  
	rename fodelseland_fodgreg4 mother_birth_place
	gen mom_foreign = 1 * (mother_birth_place != "Sverige" | mi(mother_birth_place))
	replace mom_foreign = mom_foreign + 1 
	label def lab_for 1 "Swedish" 2 "Foreign"
	label val mom_foreign lab_for 
	drop mother_birth_place
	
end


* Execute *
main
